Foreign Exchange Rates

Libraries

library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.1
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

DataSet

daily = read.csv('Foreign_Exchange_Rates.csv')[2:24]
daily

Column Names

names(daily)
##  [1] "Time.Serie"                               
##  [2] "AUSTRALIA...AUSTRALIAN.DOLLAR.US."        
##  [3] "EURO.AREA...EURO.US."                     
##  [4] "NEW.ZEALAND...NEW.ZELAND.DOLLAR.US."      
##  [5] "UNITED.KINGDOM...UNITED.KINGDOM.POUND.US."
##  [6] "BRAZIL...REAL.US."                        
##  [7] "CANADA...CANADIAN.DOLLAR.US."             
##  [8] "CHINA...YUAN.US."                         
##  [9] "HONG.KONG...HONG.KONG.DOLLAR.US."         
## [10] "INDIA...INDIAN.RUPEE.US."                 
## [11] "KOREA...WON.US."                          
## [12] "MEXICO...MEXICAN.PESO.US."                
## [13] "SOUTH.AFRICA...RAND.US."                  
## [14] "SINGAPORE...SINGAPORE.DOLLAR.US."         
## [15] "DENMARK...DANISH.KRONE.US."               
## [16] "JAPAN...YEN.US."                          
## [17] "MALAYSIA...RINGGIT.US."                   
## [18] "NORWAY...NORWEGIAN.KRONE.US."             
## [19] "SWEDEN...KRONA.US."                       
## [20] "SRI.LANKA...SRI.LANKAN.RUPEE.US."         
## [21] "SWITZERLAND...FRANC.US."                  
## [22] "TAIWAN...NEW.TAIWAN.DOLLAR.US."           
## [23] "THAILAND...BAHT.US."
names(daily) = c('Date', 'Australia', 'Euro', 'NewZealand', 'UK', 'Brazil', 'Canada', 'China', 'HongKong', 'India', 'Korea', 'Mexico', 'SouthAfrica', 'Singapore', 'Denmark', 'Japan', 'Malaysia', 'Norway', 'Sweden', 'SriLanka', 'Switzerland', 'Taiwan', 'Thailand')

Convert Dates

daily$Date = as.Date(daily$Date, format = "%Y-%m-%d")
head(daily)

Column Data Type

sapply(daily, class)
##        Date   Australia        Euro  NewZealand          UK      Brazil 
##      "Date" "character" "character" "character" "character" "character" 
##      Canada       China    HongKong       India       Korea      Mexico 
## "character" "character" "character" "character" "character" "character" 
## SouthAfrica   Singapore     Denmark       Japan    Malaysia      Norway 
## "character" "character" "character" "character" "character" "character" 
##      Sweden    SriLanka Switzerland      Taiwan    Thailand 
## "character" "character" "character" "character" "character"
daily[,2:23] = sapply(daily[2:23], as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

Handling Missing Values

# Calculate NA Values
colSums(is.na.data.frame(daily))
##        Date   Australia        Euro  NewZealand          UK      Brazil 
##           0         198         198         198         198         198 
##      Canada       China    HongKong       India       Korea      Mexico 
##         198         197         198         199         198         198 
## SouthAfrica   Singapore     Denmark       Japan    Malaysia      Norway 
##         198         198         198         198         198         198 
##      Sweden    SriLanka Switzerland      Taiwan    Thailand 
##         198         198         198         201         198
# Delete NA Values
daily = na.omit(daily)

Exploring the Indian Rupee

ggplot(data= daily, aes(x= Date, y= India)) + geom_line(color='royalblue', size =0.1)

ggplot() + 
  geom_line(data= daily, aes(x= Date, y= India, color='India'), size=0.25) +
  geom_line(data= daily, aes(x= Date, y= SriLanka, color='SriLanka'), size=0.25) +
  ylab('Indian Rupee vs Sri Lankan Rupee') + 
  scale_color_discrete(name = "Currency: Rupee", labels = c("India", "Sri Lanka")) +
  theme_minimal()

ggplot(data= daily, aes(x= Date, y= India)) + 
  geom_point(color='royalblue', size=0.1, alpha=0.25) +
  geom_smooth(method = 'loess', formula = y ~ x, se = F, 
              color='slategrey', size=0.25) +
  ylab('Indian Rupee')

with(daily, plot(Date, India, pch = 1, col = "blue"))
abline(lm(formula = India~Date, data = daily))

with(daily, plot(Date, Euro, pch = 1, col = "red"))
abline(lm(formula = Euro~Date, data = daily))

plot_ly(daily, x= ~Date, y= ~India, type='scatter', mode='lines', name= 'India') %>%
  add_trace(y= ~SriLanka, type='scatter', mode='lines', name= 'Sri Lanka') %>%
  add_trace(y= ~Euro, type='scatter', mode='lines', name= 'Euro') %>%
  layout(yaxis = list(type = "log", title='Currency'))
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
plot_ly(daily, x= ~Date) %>% 
add_trace(y= ~Australia, type='scatter', mode='lines', name= 'Australia') %>% 
add_trace(y= ~Euro, type='scatter', mode='lines', name= 'Euro') %>% 
add_trace(y= ~UK, type='scatter', mode='lines', name= 'UK') %>%
add_trace(y= ~Canada, type='scatter', mode='lines', name= 'Canada') %>%
add_trace(y= ~Japan, type='scatter', mode='lines', name= 'Japan') %>%
add_trace(y= ~Korea, type='scatter', mode='lines', name= 'Korea') %>%
layout(yaxis = list(type = "log", title='Currency'))

Time Series analysis can be classified as:

  1. Parametric and Non-parametric
  2. Linear and Non-linear and
  3. Univariate and multivariate

Techniques used for time series analysis:

  1. ARIMA models
  2. Box-Jenkins multivariate models
  3. Holt winters exponential smoothing (single, double and triple)

AutoRegressive Integrated Moving Average (ARIMA)

Assumptions of ARIMA

  1. Stationary Data Stationarity means that the statistical properties of a process generating a time series do not change over time . It does not mean that the series does not change over time, just that the way it changes does not itself change over time. It means that the properties of the series doesn’t depend on the time when it is captured. A white noise series and series with cyclic behavior can also be considered as stationary series.

  2. Univariate Data

Converting Data.Frame to Time-Series

df = ts(data = daily, start = c(2000, 01), end= c(2019, 12), frequency = 12)

ts.plot(daily[, c(3, 5, 7, 8, 11)], col = 1:5, log='y')
legend("right", 
       legend = c(names(daily[, c(3, 5, 7, 8, 11)])),
       col= 1:5, lty=1:5, lwd = 5)

Convert India’s Exchange Rate to time series

india = ts(data = daily[, 11], start = c(2000, 01), end = c(2019, 12), frequency = 12)
india
##          Jan     Feb     Mar     Apr     May     Jun     Jul     Aug     Sep
## 2000 1128.00 1122.50 1135.00 1146.50 1138.00 1133.50 1147.00 1144.50 1135.50
## 2001 1126.00 1128.00 1124.50 1127.00 1127.80 1125.00 1121.00 1124.00 1127.00
## 2002 1129.60 1129.50 1124.00 1121.00 1115.00 1128.00 1128.00 1126.00 1128.00
## 2003 1144.00 1137.00 1135.00 1131.31 1128.50 1120.50 1120.50 1119.30 1118.70
## 2004 1121.00 1118.40 1118.50 1118.00 1117.30 1118.18 1117.00 1113.00 1110.00
## 2005 1109.40 1107.50 1105.50 1113.00 1114.00 1115.50 1112.00 1107.50 1110.50
## 2006 1111.20 1113.80 1110.00 1109.50 1109.00 1108.50 1108.80 1108.50 1108.10
## 2007 1110.10 1110.00 1111.50 1111.00 1109.70 1109.50 1109.20 1113.00 1113.50
## 2008 1118.80 1122.50 1131.50 1135.00 1133.50 1129.90 1137.50 1137.00 1130.00
## 2009 1117.00 1113.20 1116.00 1115.50 1113.40 1114.40 1114.50 1115.00 1116.40
## 2010 1118.80 1119.50 1118.20 1118.30 1116.50 1115.00 1115.15 1113.90 1117.50
## 2011 1116.70 1115.70 1111.30 1113.00 1113.10 1113.70 1112.30 1111.60 1113.00
## 2012 1115.00 1117.00 1116.90 1115.80 1115.80 1115.00 1116.00 1118.00 1116.50
## 2013 1117.00 1116.30 1115.20 1115.00 1115.10 1114.50 1114.14 1114.80 1114.20
## 2014 1108.60 1108.80 1105.70 1105.50 1107.40 1111.00 1109.50 1109.50 1109.50
## 2015 1140.00 1127.00 1127.00 1128.00 1135.00 1126.00 1120.00 1114.00 1115.90
## 2016 1120.00 1120.00 1119.00 1121.90 1124.00 1124.00 1132.00 1130.00 1136.00
## 2017 1141.00 1138.00 1140.00 1140.00 1138.00 1140.00 1139.00 1138.30 1134.80
## 2018 1136.50 1132.00 1137.00 1141.00 1139.00 1138.70 1141.30 1144.50 1157.00
## 2019 1189.00 1189.00 1204.00 1217.00 1211.00 1220.00 1204.00 1201.00 1201.00
##          Oct     Nov     Dec
## 2000 1125.00 1127.00 1134.00
## 2001 1131.00 1130.50 1130.00
## 2002 1129.00 1135.00 1136.00
## 2003 1119.20 1123.00 1120.20
## 2004 1108.70 1112.10 1112.50
## 2005 1110.00 1108.50 1107.00
## 2006 1111.00 1110.00 1110.00
## 2007 1114.90 1115.00 1115.60
## 2008 1133.00 1125.20 1118.00
## 2009 1122.22 1120.00 1119.30
## 2010 1117.50 1119.00 1118.50
## 2011 1114.20 1116.00 1115.60
## 2012 1115.30 1114.50 1116.00
## 2013 1114.10 1111.90 1110.20
## 2014 1110.00 1115.20 1119.90
## 2015 1115.30 1118.18 1120.00
## 2016 1140.00 1140.00 1132.00
## 2017 1136.30 1135.00 1139.00
## 2018 1168.00 1179.00 1191.00
## 2019 1192.00 1184.00 1191.00
plot(india, ylab = 'Rupees', main="Indian Rupee growth 2000 through 2019")

Decompose into time series components

india_components = decompose(india)
plot(india_components)

Unit Root Test

It tests whether a time series variable is non-stationary and possesses a unit root. The null hypothesis is generally defined as the presence of a unit root and the alternative hypothesis is either stationarity, trend stationarity or explosive root depending on the test used. It’s a feature to check stochastic processes, such as random walks, that can cause problems in statistical inference involving time series models.

# install.packages("fUnitRoots")
library("fUnitRoots")
## Loading required package: timeDate
## Loading required package: timeSeries
## Loading required package: fBasics
urkpssTest(india, type = c("tau"), lags = c("short"),use.lag = NULL, doplot = TRUE)

## 
## Title:
##  KPSS Unit Root Test
## 
## Test Results:
##   NA
## 
## Description:
##  Wed Aug 19 10:31:52 2020 by user:
tsstationary = diff(india, differences=1)
plot(tsstationary)

Calculate Correlation, Covariance

Correlation

Correlation is a statistical technique that can show whether and how strongly pairs of variables are related. ### Covariance Covariance is a measure of the joint variability of two random variables. If the greater values of one variable mainly correspond with the greater values of the other variable, and the same holds for the lesser values, the covariance is positive. ### Partial Correlation In time series analysis, the PACF gives the partial correlation of a stationary time series with its own lagged values, regressed the values of the time series at all shorter lags. It contrasts with the autocorrelation function, which does not control for other lags.

acf(india, plot=F)
## 
## Autocorrelations of series 'india', by lag
## 
## 0.0000 0.0833 0.1667 0.2500 0.3333 0.4167 0.5000 0.5833 0.6667 0.7500 0.8333 
##  1.000  0.956  0.915  0.870  0.816  0.756  0.693  0.619  0.557  0.493  0.437 
## 0.9167 1.0000 1.0833 1.1667 1.2500 1.3333 1.4167 1.5000 1.5833 1.6667 1.7500 
##  0.397  0.359  0.322  0.296  0.275  0.261  0.257  0.254  0.246  0.241  0.234 
## 1.8333 1.9167 
##  0.229  0.228
acf(india, type = 'covariance', plot=F)
## 
## Autocovariances of series 'india', by lag
## 
## 0.0000 0.0833 0.1667 0.2500 0.3333 0.4167 0.5000 0.5833 0.6667 0.7500 0.8333 
##    442    422    404    384    361    334    306    274    246    218    193 
## 0.9167 1.0000 1.0833 1.1667 1.2500 1.3333 1.4167 1.5000 1.5833 1.6667 1.7500 
##    175    159    142    131    122    116    113    112    109    106    103 
## 1.8333 1.9167 
##    101    101
acf(india, type = 'covariance', plot=F)
## 
## Autocovariances of series 'india', by lag
## 
## 0.0000 0.0833 0.1667 0.2500 0.3333 0.4167 0.5000 0.5833 0.6667 0.7500 0.8333 
##    442    422    404    384    361    334    306    274    246    218    193 
## 0.9167 1.0000 1.0833 1.1667 1.2500 1.3333 1.4167 1.5000 1.5833 1.6667 1.7500 
##    175    159    142    131    122    116    113    112    109    106    103 
## 1.8333 1.9167 
##    101    101
acf(india, type = 'covariance')

pacf(india, plot=F)
## 
## Partial autocorrelations of series 'india', by lag
## 
## 0.0833 0.1667 0.2500 0.3333 0.4167 0.5000 0.5833 0.6667 0.7500 0.8333 0.9167 
##  0.956  0.012 -0.071 -0.123 -0.121 -0.062 -0.150  0.091 -0.024  0.075  0.158 
## 1.0000 1.0833 1.1667 1.2500 1.3333 1.4167 1.5000 1.5833 1.6667 1.7500 1.8333 
##  0.003 -0.028  0.002  0.031  0.014  0.063  0.022 -0.085 -0.021 -0.024  0.018 
## 1.9167 
##  0.057
pacf(india)

Removing Seasonality

timeseriesseasonallyadjusted <- india- india_components$seasonal
tsstationary <- diff(timeseriesseasonallyadjusted)
plot(tsstationary)

acf(tsstationary)

acf(tsstationary, type='covariance')

pacf(tsstationary)

Order specifies the non-seasonal part of the ARIMA model: (p, d, q) refers to the AutoRegression order, the degree of difference, and the MovingAverage order.

Seasonal specifies the seasonal part of the ARIMA model, plus the period (which defaults to frequency(x) i.e 12 in this case). This function requires a list with components order and period, but given a numeric vector of length 3, it turns them into a suitable list with the specification as the ‘order’.

Method refers to the fitting method, which can be ‘maximum likelihood (ML)’ or ‘minimize conditional sum-of-squares (CSS)’. The default is conditional-sum-of-squares.

fitARIMA <- arima(india, 
                  order=c(1,1,1), 
                  seasonal = list(order = c(1,0,0), period = 12), 
                  method="ML")
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following object is masked from 'package:timeSeries':
## 
##     time<-
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
coeftest(fitARIMA)
## 
## z test of coefficients:
## 
##       Estimate Std. Error z value Pr(>|z|)
## ar1  -0.263108   0.830011 -0.3170   0.7512
## ma1   0.290375   0.825234  0.3519   0.7249
## sar1 -0.080456   0.078604 -1.0236   0.3060
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

Manually Configured ARIMA

confint(fitARIMA)
##           2.5 %     97.5 %
## ar1  -1.8898999 1.36368472
## ma1  -1.3270534 1.90780339
## sar1 -0.2345172 0.07360442
predict(fitARIMA, n.ahead = 20)
## $pred
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2020 1191.416 1191.349 1190.159 1189.109 1189.593 1188.868 1190.156 1190.397
## 2021 1191.168 1191.174 1191.269 1191.354 1191.315 1191.373 1191.270 1191.250
##           Sep      Oct      Nov      Dec
## 2020 1190.397 1191.121 1191.765 1191.202
## 2021                                    
## 
## $se
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2020  4.597650  6.591302  8.089602  9.355187 10.467853 11.473361 12.397524
## 2021 16.814457 17.359770 17.889109 18.403064 18.903093 19.390220 19.865409
##            Aug       Sep       Oct       Nov       Dec
## 2020 13.257434 14.064865 14.828396 15.554491 16.248172
## 2021 20.329493
plot(forecast(fitARIMA, h = 20, level=c(99.5)))

Automatic Configured ARIMA

auto = auto.arima(india, trace=TRUE)
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(2,1,2)(1,0,1)[12] with drift         : 1382.349
##  ARIMA(0,1,0)            with drift         : 1409.073
##  ARIMA(1,1,0)(1,0,0)[12] with drift         : 1377.548
##  ARIMA(0,1,1)(0,0,1)[12] with drift         : 1412.026
##  ARIMA(0,1,0)                               : 1407.821
##  ARIMA(1,1,0)            with drift         : 1410.377
##  ARIMA(1,1,0)(2,0,0)[12] with drift         : 1388.869
##  ARIMA(1,1,0)(1,0,1)[12] with drift         : 1379.26
##  ARIMA(1,1,0)(0,0,1)[12] with drift         : 1411.51
##  ARIMA(1,1,0)(2,0,1)[12] with drift         : 1388.744
##  ARIMA(0,1,0)(1,0,0)[12] with drift         : 1374.78
##  ARIMA(0,1,0)(2,0,0)[12] with drift         : 1385.995
##  ARIMA(0,1,0)(1,0,1)[12] with drift         : 1376.453
##  ARIMA(0,1,0)(0,0,1)[12] with drift         : 1410.051
##  ARIMA(0,1,0)(2,0,1)[12] with drift         : 1385.974
##  ARIMA(0,1,1)(1,0,0)[12] with drift         : 1376.631
##  ARIMA(1,1,1)(1,0,0)[12] with drift         : 1378.26
##  ARIMA(0,1,0)(1,0,0)[12]                    : 1373.959
##  ARIMA(0,1,0)(2,0,0)[12]                    : 1385.048
##  ARIMA(0,1,0)(1,0,1)[12]                    : 1375.566
##  ARIMA(0,1,0)(0,0,1)[12]                    : 1408.914
##  ARIMA(0,1,0)(2,0,1)[12]                    : 1385.56
##  ARIMA(1,1,0)(1,0,0)[12]                    : 1376.57
##  ARIMA(0,1,1)(1,0,0)[12]                    : 1375.723
##  ARIMA(1,1,1)(1,0,0)[12]                    : 1376.922
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(0,1,0)(1,0,0)[12]                    : 1411.776
## 
##  Best model: ARIMA(0,1,0)(1,0,0)[12]
confint(auto)
##           2.5 %    97.5 %
## sar1 -0.2367748 0.0698121
predict(auto, n.ahead = 20)
## $pred
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2020 1191.167 1191.167 1189.915 1188.829 1189.330 1188.579 1189.915 1190.165
## 2021 1190.986 1190.986 1191.091 1191.181 1191.139 1191.202 1191.091 1191.070
##           Sep      Oct      Nov      Dec
## 2020 1190.165 1190.917 1191.584 1191.000
## 2021                                    
## 
## $se
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2020  4.609598  6.518956  7.984057  9.219195 10.307374 11.291162 12.195849
## 2021 16.517550 17.049289 17.564938 18.065875 18.553291 19.028226 19.491593
##            Aug       Sep       Oct       Nov       Dec
## 2020 13.037911 13.828793 14.576828 15.288306 15.968115
## 2021 19.944196
plot(forecast(auto, h = 20, level = 99.5))

Diagnosing HyperParameter Configuration Issues

acf(fitARIMA$residuals)

library(FitAR)
## Loading required package: lattice
## Loading required package: leaps
## Loading required package: ltsa
## Loading required package: bestglm
## 
## Attaching package: 'FitAR'
## The following object is masked from 'package:forecast':
## 
##     BoxCox
boxresult = LjungBoxTest (fitARIMA$residuals,k=2,StartLag=1)
plot(boxresult[,3],main= "Ljung-Box Q Test", ylab= "P-values", xlab= "Lag")

qqnorm(fitARIMA$residuals)
qqline(fitARIMA$residuals)